{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cPickle\n",
    "import itertools\n",
    "import datetime\n",
    "import numpy as np\n",
    "import scipy.io as sio\n",
    "import scipy.sparse as ss\n",
    "import scipy.spatial.distance as ssd\n",
    "from collections import defaultdict\n",
    "from sklearn.preprocessing import normalize\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of uniqueUsers :3391\n",
      "number of uniqueEvents :13418\n"
     ]
    }
   ],
   "source": [
    "uniqueUsers = set()\n",
    "uniqueEvents = set()\n",
    "\n",
    "eventsForUser = defaultdict(set)\n",
    "usersForEvent = defaultdict(set)\n",
    "    \n",
    "for filename in [\"train.csv\", \"test.csv\"]:\n",
    "    f = open(filename, 'rb')\n",
    "\n",
    "    f.readline().strip().split(\",\")\n",
    "    \n",
    "    for line in f: \n",
    "        cols = line.strip().split(\",\")\n",
    "        uniqueUsers.add(cols[0])   \n",
    "        uniqueEvents.add(cols[1])  \n",
    "  \n",
    "    f.close()\n",
    "\n",
    "\n",
    "n_uniqueUsers = len(uniqueUsers)\n",
    "n_uniqueEvents = len(uniqueEvents)\n",
    "\n",
    "print(\"number of uniqueUsers :%d\" % n_uniqueUsers)\n",
    "print(\"number of uniqueEvents :%d\" % n_uniqueEvents)\n",
    "\n",
    "userEventScores = ss.dok_matrix((n_uniqueUsers, n_uniqueEvents))\n",
    "userIndex = dict()\n",
    "eventIndex = dict()\n",
    "\n",
    "for i, u in enumerate(uniqueUsers):\n",
    "    userIndex[u] = i\n",
    " \n",
    "for i, e in enumerate(uniqueEvents):\n",
    "    eventIndex[e] = i\n",
    "\n",
    "n_records = 0\n",
    "ftrain = open(\"train.csv\", 'rb')\n",
    "ftrain.readline()\n",
    "for line in ftrain:\n",
    "    cols = line.strip().split(\",\")\n",
    "    i = userIndex[cols[0]] \n",
    "    j = eventIndex[cols[1]]\n",
    "    \n",
    "    eventsForUser[i].add(j)   \n",
    "    usersForEvent[j].add(i)    \n",
    "    score = int(cols[4])\n",
    "  \n",
    "    userEventScores[i, j] = score\n",
    "ftrain.close()\n",
    "\n",
    "\n",
    "cPickle.dump(eventsForUser, open(\"PE_eventsForUser.pkl\", 'wb'))\n",
    "\n",
    "cPickle.dump(usersForEvent, open(\"PE_usersForEvent.pkl\", 'wb'))\n",
    "\n",
    "sio.mmwrite(\"PE_userEventScores\", userEventScores)\n",
    "\n",
    "\n",
    "cPickle.dump(userIndex, open(\"PE_userIndex.pkl\", 'wb'))\n",
    "\n",
    "cPickle.dump(eventIndex, open(\"PE_eventIndex.pkl\", 'wb'))\n",
    "\n",
    "\n",
    "uniqueUserPairs = set()\n",
    "uniqueEventPairs = set()\n",
    "for event in uniqueEvents:\n",
    "    i = eventIndex[event]\n",
    "    users = usersForEvent[i]\n",
    "    if len(users) > 2:\n",
    "        uniqueUserPairs.update(itertools.combinations(users, 2))\n",
    "        \n",
    "for user in uniqueUsers:\n",
    "    u = userIndex[user]\n",
    "    events = eventsForUser[u]\n",
    "    if len(events) > 2:\n",
    "        uniqueEventPairs.update(itertools.combinations(events, 2))\n",
    "\n",
    "cPickle.dump(uniqueUserPairs, open(\"FE_uniqueUserPairs.pkl\", 'wb'))\n",
    "cPickle.dump(uniqueEventPairs, open(\"PE_uniqueEventPairs.pkl\", 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from utils import FeatureEng"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of records :3137973\n"
     ]
    }
   ],
   "source": [
    "lines = 0\n",
    "fin = open(\"events.csv\", 'rb')\n",
    "\n",
    "for line in fin:\n",
    "    cols = line.strip().split(\",\")\n",
    "    lines += 1\n",
    "fin.close()\n",
    "\n",
    "print(\"number of records :%d\" % lines)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of events in train & test :13418\n"
     ]
    }
   ],
   "source": [
    "eventIndex = cPickle.load(open(\"PE_eventIndex.pkl\", 'rb'))\n",
    "n_events = len(eventIndex)\n",
    "\n",
    "print(\"number of events in train & test :%d\" % n_events)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/anaconda2/lib/python2.7/site-packages/scipy/spatial/distance.py:698: RuntimeWarning: invalid value encountered in double_scalars\n",
      "  dist = 1.0 - uv / np.sqrt(uu * vv)\n"
     ]
    }
   ],
   "source": [
    "FE = FeatureEng()\n",
    "\n",
    "fin = open(\"events.csv\", 'rb')\n",
    "\n",
    "\n",
    "fin.readline()\n",
    "\n",
    "eventPropMatrix = ss.dok_matrix((n_events, 7))\n",
    "\n",
    "eventContMatrix = ss.dok_matrix((n_events, 101))\n",
    "\n",
    "for line in fin.readlines():\n",
    "    cols = line.strip().split(\",\")\n",
    "    eventId = str(cols[0])\n",
    "    \n",
    "    if eventIndex.has_key(eventId): \n",
    "        i = eventIndex[eventId]\n",
    "  \n",
    "       \n",
    "        eventPropMatrix[i, 0] = FE.getJoinedYearMonth(cols[2]) \n",
    "        eventPropMatrix[i, 1] = FE.getFeatureHash(cols[3]) \n",
    "        eventPropMatrix[i, 2] = FE.getFeatureHash(cols[4]) \n",
    "        eventPropMatrix[i, 3] = FE.getFeatureHash(cols[5]) \n",
    "        eventPropMatrix[i, 4] = FE.getFeatureHash(cols[6]) \n",
    "        eventPropMatrix[i, 5] = FE.getFloatValue(cols[7]) \n",
    "        eventPropMatrix[i, 6] = FE.getFloatValue(cols[8]) \n",
    "        \n",
    "        for j in range(9, 109):\n",
    "            eventContMatrix[i, j-9] = cols[j]\n",
    "fin.close()\n",
    "\n",
    "\n",
    "eventPropMatrix = normalize(eventPropMatrix,\n",
    "    norm=\"l2\",copy=False)\n",
    "sio.mmwrite(\"EV_eventPropMatrix\", eventPropMatrix)\n",
    "\n",
    "\n",
    "eventContMatrix = normalize(eventContMatrix,\n",
    "    norm=\"l2\", copy=False)\n",
    "sio.mmwrite(\"EV_eventContMatrix\", eventContMatrix)\n",
    "\n",
    "\n",
    "\n",
    "eventPropSim = ss.dok_matrix((n_events, n_events))\n",
    "eventContSim = ss.dok_matrix((n_events, n_events))\n",
    "\n",
    "uniqueEventPairs = cPickle.load(open(\"PE_uniqueEventPairs.pkl\", 'rb'))\n",
    "\n",
    "for e1, e2 in uniqueEventPairs:\n",
    "\n",
    "    i = e1\n",
    "    j = e2\n",
    "\n",
    "    if not eventPropSim.has_key((i,j)):\n",
    "        epsim = 1 - ssd.correlation(eventPropMatrix.getrow(i).todense(),\n",
    "            eventPropMatrix.getrow(j).todense())\n",
    "        \n",
    "        eventPropSim[i, j] = epsim\n",
    "        eventPropSim[j, i] = epsim\n",
    "    \n",
    "\n",
    "    if not eventContSim.has_key((i,j)):\n",
    "        ecsim = 1 - ssd.cosine(eventContMatrix.getrow(i).todense(),\n",
    "            eventContMatrix.getrow(j).todense())\n",
    "    \n",
    "        eventContSim[i, j] = epsim\n",
    "        eventContSim[j, i] = epsim\n",
    "    \n",
    "sio.mmwrite(\"EV_eventPropSim\", eventPropSim)\n",
    "sio.mmwrite(\"EV_eventContSim\", eventContSim)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "matrix([[0., 0., 0., ..., 0., 0., 0.]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eventPropSim.getrow(0).todense()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "import scipy.io as sio\n",
    "eventContMatrix = sio.mmread(\"EV_eventContMatrix\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.cluster import MiniBatchKMeans\n",
    "\n",
    "def K_cluster_analysis(K, df):\n",
    "    print(\"K-means begin with clusters: {}\".format(K));\n",
    "\n",
    "    km = MiniBatchKMeans(n_clusters = K)\n",
    "    km.fit(df)\n",
    "\n",
    "    cluster_result = km.predict(df)\n",
    "\n",
    "\n",
    "    CH_score = metrics.silhouette_score(df,cluster_result)   \n",
    "    print(\"CH_score: {}\".format(CH_score))\n",
    "\n",
    "    return CH_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-means begin with clusters: 10\n",
      "CH_score: 0.168152742783\n",
      "K-means begin with clusters: 20\n",
      "CH_score: 0.158096004847\n",
      "K-means begin with clusters: 30\n",
      "CH_score: 0.162272557986\n",
      "K-means begin with clusters: 40\n",
      "CH_score: 0.131098097612\n",
      "K-means begin with clusters: 50\n",
      "CH_score: 0.135001189796\n",
      "K-means begin with clusters: 60\n",
      "CH_score: 0.163266569437\n",
      "K-means begin with clusters: 70\n",
      "CH_score: 0.146822122796\n",
      "K-means begin with clusters: 80\n",
      "CH_score: 0.101229395195\n",
      "K-means begin with clusters: 90\n",
      "CH_score: 0.121702638834\n",
      "K-means begin with clusters: 100\n",
      "CH_score: 0.123667501055\n"
     ]
    }
   ],
   "source": [
    "CH_scores = []\n",
    "Ks = [10,20,30,40,50,60,70,80,90,100]\n",
    "for K in Ks:\n",
    "    ch = K_cluster_analysis(K, eventContMatrix)\n",
    "    CH_scores.append(ch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x1a21091650>]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xl8VPX1//HXAQREpUIJiiCCRVSK1iUgiuKkbqgVbEXFLeBGW8vv61JbqXUrtdW61NqvC+BS6walLi0qbrWKS9USwKKBYhERIiqxiigoEPj8/jiTLyGE5CaZmTuZ+34+Hnkkc+fOnZPJzZk757NZCAEREUmGVnEHICIiuaOkLyKSIEr6IiIJoqQvIpIgSvoiIgmipC8ikiBK+iIiCaKkLyKSIEr6IiIJ0ibuAGrr0qVL6NWrV9xhiIi0KLNmzfo4hFDU0H6Rkr6ZDQVuBloDd4YQrq11/xDgd8DewMgQwkPp7SXATTV23SN9/1+29Fy9evWirKwsSlgiIpJmZu9F2a/BpG9mrYFbgSOACmCmmU0LIcyrsdsSYDRwcc3HhhCeB/ZJH6czsBB4JkpgIiKSeVGu9AcCC0MIiwDMbAowHPi/pB9CWJy+b0M9xxkBPBlCWN3kaEVEpFmiNOR2B5bWuF2R3tZYI4HJTXiciIhkSJSkb3Vsa9R8zGbWDdgLeHoL948xszIzK6usrGzMoUVEpBGiJP0KYOcat3sAyxr5PCcBj4YQ1tV1ZwhhUgihOIRQXFTUYOOziIg0UZSkPxPYzcx6m1lbvEwzrZHPcwoq7YiIxK7BpB9CqALG4qWZ+cDUEEK5mY03s2EAZjbAzCqAE4GJZlZe/Xgz64V/UpiR+fBFRKQxLN+WSywuLg5N6ae/fj2MGwc/+hFobJeIJI2ZzQohFDe0X8FMw/DOO3DnnXDggTBrVtzRiIjkp4JJ+n37wiuvQNu2cOihMH163BGJiOSfgkn6AP36wauv+hvAsGFwxx1xRyQikl8KKukD7LQTzJgBRxwBY8bAZZdBnjVbiIjEpuCSPsB228G0aXD22fCrX8GoUbB2bdxRiYjEL++mVs6Urbby8s4uu8AVV8CyZfDww/C1r8UdmYhIfArySr+aGVx+Odxzj5d8Dj4Yli5t8GEiIgWroJN+tVGjvDfPe+95l865c+OOSEQkHolI+uANuy+/7D8ffDD87W/xxiMiEofEJH2AvfeG117zOv/RR8Mf/xh3RCIiuZWopA/Qo4df8R96KIweDb/8pbp0ikhyJC7pg/fgmT4dzjjDe/aMGQPr6pz0WUSksBRsl82GtG3r5Z2ePb0vf0UFTJ3qffxFRApVIq/0q5nB1VfDxInw7LNe8vngg7ijkkK2fn3cEUjSJTrpVxszxkfwvv02DBoE8+Y1/BiRxlq1yueFuuyyuCORJFPSTzvmGB/AtWYNDB7sP4tk0pQpsGiRlxP//ve4o5GkUtKvYf/9vUvnjjvCkUf6P2lLtX49rFwZdxRS04QJsOeefrU/ejSsWBF3RJJESvq19Orl8/IfcACccgpcd13L6tJZXu4riO2yC3TvDpWVcUck4Av7lJXBeefBfff5XFBjx8YdlSSRkn4dOneGZ56Bk06CSy7xf858boD76CP43e9gv/2gf3+44QbYfXf44ouW/WmlkEycCFtvDaefDgMH+pxQDzwAf/pT3JFJ0ijpb0H79jB5MvzkJ3DbbfC978Hq1XFHtdGXX3pCP+YYv6K/8EJo1cqT//vvw3PP+ZuARh3H77PP4MEH/ZPj9tv7tksv9eT/wx/630skV5T069GqlZd3brkFHnsMSkpg+fL44tmwAV54wdcJ2GEHTyJvvulvTOXlXj44/3y/D6C01MsK5eXxxSx+Rb9qFfzgBxu3bbWVl3nWrIEzz/S/rUguREr6ZjbUzBaY2UIzG1fH/UPMbLaZVZnZiFr39TSzZ8xsvpnNM7NemQk9d370I3j0UU+wBx7oXTtz6d//hp//HHr39jeeqVPhhBP8av699+Caa3ypyNpOOQVat/bkIvEIwRtw99sPios3va9vX7jxRh8jcttt8cQnCRRCqPcLaA28A+wKtAX+BfSrtU8vYG/gXmBErfteAI5I/7wt0KG+59t///1DvnrttRC6dAnh618P4ZVXsvtcy5eH8PvfhzBgQAgQQqtWIQwdGsIDD4SwalX043znOyF07x5CVVX2YpUt+8c//O83cWLd92/YEMLRR4fQvn0I8+blNjYpLEBZaCCfhxAiXekPBBaGEBaFENYCU4Dhtd44FocQ5gKbfEg1s35AmxDCs+n9vggh5FFlvHEOOMAXXu/UCQ47DB55JLPH/+or+POffVH3nXaC//kfnxPoxht9mognn4RTT4UOHaIfs7TUa8bPP5/ZWCWaCRN8ao9TTqn7fjO46y7YZhtv5NWynpJtUZJ+d6DmelMV6W1R9AVWmNkjZjbHzK43s9a1dzKzMWZWZmZllXnex7BPH/jHP2CffWDECLj55uYdLwR46SUfFbzjjt5jaNYsb5idOxfmzIGLLoJu3Zp2/OOO8wnm7r23eXFK433yiZfiTj+9/jmdunWDSZNg9myf9VUkm6IkfatjW9Se622AQ4CLgQF4iWj0ZgcLYVIIoTiEUFxUVBTx0PEpKvJ6+vDhcMEFnqAb2xD3n//4DJ/f+AYMGeK9O4YN866iS5Z4A/JeezU/1vbt4eSTfX3gL75o/vEkunvv9U9v3/9+w/t+73s+YOvXv/ZPkyLZEiXpVwA717jdA1gW8fgVwJx0aagK+AuwX+NCzE8dOsBDD8H/+3/eTfLkk/0fvD7//a832B14oDfiXX21f3K491748EP/fsQR3viaSaWl3t000+Uo2bLqBtxBg+Bb34r2mJtvhp139im/9QYt2RIl6c8EdjOz3mbWFhgJTIt4/JlAJzOrvnz/NlAw05m1bu3/qDfe6G8Ahx/uib2mNWs82X73u/4x/kc/8n/o667zRdqfecb/ybfdNntxHnQQ7LqrSjy5NGMGLFiwaTfNhnTs6H+jRYu8pCeSFVFae4FjgLfxXjw/T28bDwxL/zwAv6pfBfwXKK/x2COAucCbwD1A2/qeK59779Rn6tQQ2rULoW/fEN55x3tt/OAHIXTq5L03dtwxhIsuCmHOHO+xkWtXXRWCWQhLluT+uZNo5MgQtt8+hNWrG//Yn/7Uz5lp0zIflxQuIvbesZBnE8sUFxeHsrKyuMNokpdf9rr8ypU+bcPWW/sV/hln+KeANjEuWbNokbcfXHONz80j2bN8uS/Led55XvprrDVrvKfYBx/42JCuXTMfoxQeM5sVQihuaD+NyM2ggw/2nj3nngt/+IPPifPAAzB0aLwJH7y8c/DBXj7Is/f5gvOHP3hX2ygNuHVp1w7uv99n4RwzRn8vySwl/QzbYw+4/XbviZFvSy+WlsL8+d4lVLJjwwbvfjlkiE+j3FT9+/unsr/+Fe6+O3PxiSjpJ8iJJ/pVpBp0s+dvf/NSWmMacLfkggt82o3zz4d33mn+8URAST9Rtt/exxZMnqyRn9kyYQJ06eL97purVSu45x4vDZaW5vf03tJyKOknTGkpfPwxPPVU3JEUnmXLfK3ls87yT1SZ0LMn3HqrtxVdd11mjinJpqSfMEce6b1BVOLJvLvu8qvxc8/N7HFPPdWn57jiCp+qQaQ5lPQTZqutPIk89pjPDSOZsX493HGHj6ju0yezxzbzzgFdu/o8Pl9+mdnjS7Io6SdQaanX9KdOjTuSwvHkkz7COhMNuHXp3Nnr+/Pnw89+lp3nkGRQ0k+gffbxLoEq8WTOhAk+S+pxx2XvOY44wud6uvlm7yUk0hRK+glk5lf7r77qs31K87z3HkyfDuec4+WzbLr2Wh8LMno0fPppdp9LCpOSfkKddpp3CdRSis13xx3+RprpBty6dOjgo3U/+sineRBpLCX9hNppJ58P6L77tCh3c6xb5712jj7au1fmwv77w1VXwZQpPuZCpDGU9BOstBQWL/aJ4qRppk3ztRCy1YC7JZdc4usynHeeNyCLRKWkn2DHH+/z+KtBt+kmTPCFT44+OrfP26aN/93WrYMzz9SnNYlOST/BttnG1/mdOlV9v5ti4ULvRTNmTOZXO4uiTx+46SZfuvP3v8/980vLpKSfcKWl8PnnPpujNM6kSZ7szzorvhjOOce7iY4bB+Xl8cUhLYeSfsIdeqiXJ1TiaZw1a3ze/OHDvVE8Lmbee6hjRx+tq4n0pCFK+gnXqpWv7PX0094gKdE88ohPXJfrBty67LCDJ/433vBePSL1UdIXzjjDGwIffDDuSFqOCRN8NbLDDos7Ejd8OJx9NvzmN+qNJfVT0hf22AMGDlSJJ6p58+DFF305xFZ59B90003Qq5e306xcGXc0kq8inbJmNtTMFpjZQjPbbFltMxtiZrPNrMrMRtS6b72ZvZH+mpapwCWzSkvhX//yL6nfpEk+3cKZZ8Ydyaa2287fuN97Dy68MO5oJF81mPTNrDVwK3A00A84xcz61dptCTAaqKtA8GUIYZ/017BmxitZcvLJnsg0LUP9Vq+GP/4RTjgBiorijmZzgwd7T56774a//CXuaCQfRbnSHwgsDCEsCiGsBaYAw2vuEEJYHEKYC2iISAvVpQsceyw88ABUVcUdTf6aOhVWrMiPBtwtufJK2G8/nwvoo4/ijkbyTZSk3x2oOdC7Ir0tqvZmVmZmr5nZ8Y2KTnKqtNR78Gja3i2bONHbQIYMiTuSLWvb1j+xffGFN+6GEHdEkk+iJH2rY1tjTqOeIYRi4FTgd2b2jc2ewGxM+o2hrLKyshGHlkw65hhfrEMNunV74w147TVvwLW6/ivySL9+3pPniSe8O6dItShJvwLYucbtHsCyqE8QQliW/r4IeAHYt459JoUQikMIxUX5WChNiHbtYORIePRR9f6oy8SJ0L69fyJqCcaO9ZlUL7zQp4wQgWhJfyawm5n1NrO2wEggUi8cM+tkZu3SP3cBBgPzmhqsZF9pKXz1FTz0UNyR5JfPP/d57E8+2T8NtQStWvmo4bZtfSyG2moEIiT9EEIVMBZ4GpgPTA0hlJvZeDMbBmBmA8ysAjgRmGhm1bOA7AmUmdm/gOeBa0MISvp5bOBA6NtXJZ7aJk/2Gnk+N+DWpUcPH0j22mu+6paIhTxr5SkuLg5lZWVxh5Fov/oVXHYZvPuuD/ZJuhB84ZL1672un+/1/Lqcdpr3PHr1VSgujjsayQYzm5VuP61XHo0nlHxx+un+/f77440jX8ycCXPm+FV+S0z4ALfc4gu3n366jzWQ5FLSl83ssgukUl7iybMPgrGYONHXHjjttLgjabpOneCee2DBAl91S5JLSV/qVFoK//kPvP563JHEa8UKr+efeqpPX9ySHXaY9+S55RafVVWSSUlf6nTCCbD11mrQve8+X1Xs+9+PO5LM+PWvvQ//mWfCJ5/EHY3EQUlf6tSxI3z3uzBlii8YkkQheGlnwABvyC0E7dt7meeDD/xvK8mjpC9bVFoKn37qozqT6JVXfAnCQrnKr1Zc7KulvfBC3JFIHJT0ZYsOOwy6dUtuiWfCBP/EM3Jk3JFklpk31L/wghrqk0hJX7aoTRvvsfLEE740YJJ8/LGPSi4t9Z47hSaVgspKmD8/7kgk15T0pV6lpT58f/LkuCPJrT/+0dsyCq20Uy2V8u/PPx9rGBIDJX2p1157wT77JKvEU92AO3gw9O8fdzTZ0bs39Oypun4SKelLg0pLoazM14ZNguef9zEKLW2encZQXT+5lPSlQaecAq1bJ2cpxQkTfCbNESMa3rclS6W87aK8vMFdpYAo6UuDdtwRjjrK5+JZvz7uaLLrww99PYHRo71PeyErKfHvKvEki5K+RFJaChUVhZ8g7r7bG67HjIk7kuzr1cvnWSr0v6lsSklfIhk2zPusF3KD7vr1vrTgt78Nu+8edzS5kUrBjBmwYUPckUiuKOlLJFtvDSedBA8/7IuJFKJnnoHFiwu3m2ZdSkpU108aJX2JrLQUVq3ymnchmjABunaF44+PO5LcOfRQ/64ST3Io6Utkgwd7/+5CLPFUVMDjj8PZZ/uasknRq5d/Keknh5K+RNaqlS+w/dxzniQLyZ13en/1c8+NO5Lcq+6vr7p+MijpS6OccYYnxwceiDuSzKmq8gbco47yTzJJU1Lic+u/9VbckUguKOlLo/TpAwcdVFhLKT7xBCxbVtgjcOujun6yREr6ZjbUzBaY2UIzG1fH/UPMbLaZVZnZZuMYzayjmb1vZrdkImiJV2mpT8kwe3bckWTGhAnQvTsce2zckcRjl138E46SfjI0mPTNrDVwK3A00A84xcz61dptCTAaeHALh/klMKPpYUo+Oekkb+wshAbdd9/19WLPOcenkk6qkhL110+KKFf6A4GFIYRFIYS1wBRgeM0dQgiLQwhzgc1OGTPbH9gBeCYD8Uoe6NTJB2s9+CCsWxd3NM1zxx0++dg558QdSbxSKa/rv/lm3JFItkVJ+t2BpTVuV6S3NcjMWgE3Aj9pYL8xZlZmZmWVlZVRDi0xKy31QT1PPRV3JE23di3cdRccdxz06BF3NPFSXT85oiR9q2Nb1Ca884DpIYSl9e0UQpgUQigOIRQXFRVFPLTEaehQKCpq2SWev/wFli9P1gjcLenZE3bdVYuqJEGUKmYFsHON2z2AZRGPfyBwiJmdB2wLtDWzL0IImzUGS8uy1VZw6qlw++2+eHqnTnFH1HgTJvjApCOPjDuS/FBSAo884nX9VurXV7Ci/GlnAruZWW8zawuMBKZFOXgI4bQQQs8QQi/gYuBeJfzCUVrqJZKpU+OOpPEWLPCr2jFjfK0A8br+p5/C3LlxRyLZ1GDSDyFUAWOBp4H5wNQQQrmZjTezYQBmNsDMKoATgYlmpumbEmDffeGb32yZJZ5Jk7y3zplnxh1J/qheN1d1/cJmIc9G2BQXF4eysrK4w5CIrrsOLrnElxfs0yfuaKL56ivvl3/YYS3zU0o27bYb9OsHf/1r3JFIY5nZrBBCcUP7qXInzXLaad7lsSUtpfjQQ949MakjcOuTSsGLLxb+CmlJpqQvzdK9Oxx+uJd4WsrAngkT/Iq2erlA2SiVghUrVNcvZEr60mylpb74yCuvxB1Jw956y+P8/vf9E4psqrqur66bhUtJX5rtu9+FbbZpGQ26EydCu3YwalTckeSn7t39U5AacwuXkr402zbbwIgR3ij65ZdxR7Nlq1b5G9OJJ0KXLnFHk79U1y9sSvqSEaWlsHIlTIs0giMeU6Z4jBqBW79UCj77DN54I+5IJBuU9CUjUinYeef8LvFMnOjjCgYPjjuS/Kb++oVNSV8yolUrOP10n6b4ww/jjmZzs2bBzJneTVMNuPXbaSfo21dJv1Ap6UvGnHGG14EnT447kk0tXAhXXglbb+1vTNIw1fULl5K+ZMyee8KAAflR4nnvPbj+eigu9t4oTzwBF18M228fd2QtQyrl7R9z5sQdiWSakr5kVGmpNwDGMbhn2TK4+WY48ECfPfOnP/Wy0w03wJIlMH587mNqqVTXL1xK+pJRI0f6RGa5mpZh+XK47TZfBKRHD7jgAu82es018M478M9/wo9/7I3MEl23brD77kr6hSjBq4JKNnTp4guM33+/J95srDv7ySc+7/uf/gR//7tP/7DnnnDVVXDyyZ6spPlSKV8Ss6oq2esHFxpd6UvGlZZ6D57nnsvcMT/7zNsKjjkGdtgBzj3Xp3742c+8lFReDldcoYSfSSUl8PnnqusXGr1/S8Yde6yvpHXvvXDUUU0/zhdfwOOP+6CqJ5/0BVt22QUuvNDLSPvuq+6X2VRz3dwBA2INRTJISV8yrl07T8r33OM9QDp2jP7YL7+E6dO9dPP44357p53gvPO8dHPAAUr0ubLjjrDHHp70f/KTuKORTFF5R7KitNQT9sMPN7zvmjXw2GPeh75rV5/HZ8YMX9VqxgxYuhRuugkGDVLCz7VUCl56yev6UhiU9CUrDjjA+8dvqc/+unU+evess7xGP2yYl3BGjoRnn4X334dbb4UhQ7RId5yq6/qzZ8cdiWSKyjuSFWZ+tX/55T5QapddfHTniy96jf7hh+G///XSz/HHe7I//HDYaqu4I5eaatb1Bw6MNRTJEK2RK1mzeDH07u1X8x06+DKFH37oUzEfd5wn+qOOgvbt445U6tOvn79pP/lk3JFIfTK6Rq6ZDTWzBWa20MzG1XH/EDObbWZVZjaixvZdzGyWmb1hZuVmplVJE6RXL79SvPtuuPNOn91y6lQfUDV5MgwfroTfEpSUwMsve0lOWr4Gyztm1hq4FTgCqABmmtm0EMK8GrstAUYDF9d6+AfAQSGENWa2LfBW+rHLMhK95L077/R+3kOHwnbbxR2NNEUq5aOeZ8/2thpp2aLU9AcCC0MIiwDMbAowHPi/pB9CWJy+b5OlsUMIa2vcbIcajhOnTx//kparZl1fSb/li5KEuwNLa9yuSG+LxMx2NrO56WP8Rlf5Ii1L165e19di6YUhStKvq2d05NbfEMLSEMLeQB9glJntsNkTmI0xszIzK6usrIx6aBHJEdX1C0eUpF8B1JyjsAfQ6Kv19BV+OXBIHfdNCiEUhxCKi4qKGntoEcmyVMoXlp81K+5IpLmiJP2ZwG5m1tvM2gIjgUjLX5tZDzPbOv1zJ2AwsKCpwYpIPKrr+irxtHwNJv0QQhUwFngamA9MDSGUm9l4MxsGYGYDzKwCOBGYaGbl6YfvCbxuZv8CZgA3hBDezMYvIiLZU1QE/ftrfv1CEGlEbghhOjC91rYravw8Ey/71H7cs8DezYxRRPJAKuVjLtat08jplkxdKEUkklQKVq8GDZhv2ZT0RSQS1fULg5K+iETSpQvstZfq+i2dkr6IRJZKwSuv+Cpm0jIp6YtIZNV1/Zkz445EmkpJX0QiqzkPj7RMSvoiEtnXvw57762k35Ip6YtIo6iu37Ip6YtIo6RSvuj9P/8ZdyTSFEr6ItIohx7qayCrxNMyKemLSKN07qy6fkumpC8ijVZd11+zJu5IpLGU9EWk0UpK4KuvVNfPpFWrYMmS7D9PpFk2RURqOuSQjXX9QzZbFknqs3YtLFgAb7216de778JBB/kKZdmkpC8ijda5M3zrWz752uWXxx1Nflq/HhYt2jy5v/02VFX5Pq1bQ9++sP/+MGqUf882JX0RaZKSErj9di/ztG8fdzTxCQGWLoXy8k2T+7x5/tpU23VXX4jm+OP9+ze/CbvvDu3a5TZeJX0RaZJUCm66yev6Q4bEHU1uLF+++ZV7eTmsXLlxn5128qR+3nn+vX9/2HNP2Hbb+OKuSUlfRJqkZl2/0JL+Z59tfuX+1ltQWblxn86dfarpM87YmNy/+U3o1Cm+uKNQ0heRJunUCfbZx+v6V1zR8P75atUqeOihTZN7RcXG+7fd1pP5sGEbk3v//rDDDv6m19Io6YtIk5WUwK23tuy6fmkpPPKI19b33NPLVtVX7f37Q8+e0KqAOrcr6YtIk6VS8Nvfwuuvb5x2uSV58UVP+Jdf7p9W2iQgI0Z6/zKzoWa2wMwWmtm4Ou4fYmazzazKzEbU2L6Pmb1qZuVmNtfMTs5k8CISr0MO8avglrhu7oYNcNFF0L07jBuXjIQPEa70zaw1cCtwBFABzDSzaSGEeTV2WwKMBi6u9fDVQGkI4T9mthMwy8yeDiGsyEj0IhKr7beHffdtmfPwPPggzJoF994LHTrEHU3uRLnSHwgsDCEsCiGsBaYAw2vuEEJYHEKYC2yotf3tEMJ/0j8vA5YDRRmJXETyQioFr722aZ/0fPfll3DppT4Y6rTT4o4mt6Ik/e7A0hq3K9LbGsXMBgJtgXfquG+MmZWZWVllzT5RIpL3UimfeO211+KOJLqbbvIBVTfeWFiNtFFE+XXr6pQUGvMkZtYNuA84M4Swofb9IYRJIYTiEEJxUZE+CIi0JC2trv/RR3DNNTB8eMtsfG6uKEm/Ati5xu0ewLKoT2BmHYEngMtCCC3oWkBEovja12C//VpOXf/KK70Udd11cUcSjyhJfyawm5n1NrO2wEhgWpSDp/d/FLg3hPDnpocpIvmsuq7/5ZdxR1K/t96CO+7wKRL69o07mng0mPRDCFXAWOBpYD4wNYRQbmbjzWwYgJkNMLMK4ERgopmVpx9+EjAEGG1mb6S/9snKbyIisUmlfMrgV1+NO5L6/eQn0LFjyx5B3FyReqaGEKYD02ttu6LGzzPxsk/tx90P3N/MGEUkz1XX9V94Ab797bijqdszz8BTT8ENN8DXvx53NPFJWLu1iGRDx47e/TFf6/rr18PFF/v0xmPHxh1NvJT0RSQjUimfjmH16rgj2dwf/gBvvgnXXpv7+evzjZK+iGREvtb1v/jC59Y56CAYMaLh/Qudkr6IZMTBB/vyf/lW4rnuOvjwQx+I1RKnQs40JX0RyYh8rOtXVHjD7ciRMGhQ3NHkByV9EcmYfKvr//znPpvmNdfEHUn+UNIXkYwpKYF16+Af/4g7Epg922fQPP986NUr7mjyh5K+iGTM4MH5UdcPAX78Y+jSxWfTlI0SsmyAiOTCdttBcXH8Sf+xxzyGW27xuYFkI13pi0hGpVLwz3/6guNxWLfOp1vYYw8YMyaeGPKZkr6IZFTcdf0JE+Dtt+H662GrreKJIZ8p6YtIRsVZ11+xAn7xC5//59hjc//8LYGSvohk1LbbwoAB8Syq8qtfwSefaCBWfZT0RSTjSkpg5kyfAiFX3n0Xfv97GDUK9tEE7lukpC8iGZdKQVVVbuv648ZBmzZw9dW5e86WSElfRDLuoIM8AeeqxPPqqzB1qvfa6d49N8/ZUinpi0jGVdf1c9GYGwJcdBF06+ZJX+qnpC8iWZGruv7Uqb4+79VXwzbbZPe5CoGSvohkRSrlK1a98kr2nuOrr7yWv/fe3oArDVPSF5GsOOggHxyVzbr+//4vLF7sXTRbt87e8xSSSEnfzIaa2QIzW2hm4+q4f4iZzTazKjMbUeu+p8xshZk9nqmgRST/bbMNDByYvbr+xx97v/xjjoHDD8/OcxSiBpPv/e4YAAAIrUlEQVS+mbUGbgWOBvoBp5hZv1q7LQFGAw/WcYjrgTOaF6aItESpFJSVweefZ/7Yv/iFtxdcf33mj13IolzpDwQWhhAWhRDWAlOA4TV3CCEsDiHMBTbUfnAI4TkgC39yEcl31XX9l1/O7HH//W+4/XafUK1f7UtQqVeUpN8dWFrjdkV6m4hIvarr+pku8fz0p9ChA1x1VWaPmwRRkn5dM1iETAZhZmPMrMzMyiorKzN5aBGJUYcOcMABmU36zz/v8+Vfeil07Zq54yZFlKRfAexc43YPYFkmgwghTAohFIcQiouKijJ5aBGJWSoFs2bBypXNP9aGDb4iVs+ecMEFzT9eEkVJ+jOB3cyst5m1BUYC07IblogUikzW9e+7D+bM8YXO27dv/vGSqMGkH0KoAsYCTwPzgakhhHIzG29mwwDMbICZVQAnAhPNrLz68Wb2EvBn4DAzqzCzo7Lxi4hIfjrwQGjbtvklntWr4ec/926gI0dmJLREirRGbghhOjC91rYravw8Ey/71PXYQ5oToIi0bJmq6994I7z/PvzpT9BKw0qbTC+diGRddV3/s8+a9vgPPoDf/AZOOMFX5pKmU9IXkaxLpbwRtql1/csvh7VrPfFL8yjpi0jWNaeuP3cu3H03jB0L3/hGxkNLHCV9Ecm6rbeGQYMan/RDgIsvhu23h8suy0poiaOkLyI5kUrB7NmNq+s/9RQ8+yxceSV07py10BJFSV9EcqKkxOv6L70Ubf+qKr/K79MHfvjD7MaWJEr6IpITgwZBu3bRSzx33QXz5sF113l7gGSGkr6I5ET79p74oyyqsnKl99g55BA4/vjsx5YkSvoikjOplE+jsGJF/ftdey1UVsJvfwtW15SP0mRK+iKSMyUl3iOnvrr+kiVw001w2mlQXJy72JJCSV9EcuaAAxqu6196qX//9a9zElLiKOmLSM60b+8DtbZU1585Ex54AC66yKdPlsxT0heRnCopgTfegE8/3XR7CD5XfteuMG5cPLElgZK+iORUKlV3Xf/RR33b+PGw3XaxhJYISvoiklMDB3qZp2aJZ+1auOQSX+T87LPjiy0JIs2nLyKSKdV1/ZqNubfdBgsXwvTp0EZZKat0pS8iOVdSAv/6F3zyiX+NHw9HHglDh8YdWeHTe6qI5FzNuv6MGT4J2w03aCBWLijpi0jOVdf177rLZ9I86yzYa6+4o0oGlXdEJOfatfNlDx97zCdT++Uv444oOZT0RSQWqZR/v+QS2HHHWENJlEhJ38yGmtkCM1toZpsNmzCzIWY228yqzGxErftGmdl/0l+jMhW4iLRso0fDhRf6gCzJnQZr+mbWGrgVOAKoAGaa2bQQwrwauy0BRgMX13psZ+BKoBgIwKz0Y2uNxRORpOnRw2fRlNyKcqU/EFgYQlgUQlgLTAGG19whhLA4hDAX2FDrsUcBz4YQPkkn+mcBdcoSEYlJlKTfHVha43ZFelsUzXmsiIhkWJSkX1fP2RDx+JEea2ZjzKzMzMoqKysjHlpERBorStKvAHaucbsHsCzi8SM9NoQwKYRQHEIoLioqinhoERFprChJfyawm5n1NrO2wEhgWsTjPw0caWadzKwTcGR6m4iIxKDBpB9CqALG4sl6PjA1hFBuZuPNbBiAmQ0wswrgRGCimZWnH/sJ8Ev8jWMmMD69TUREYmAhRC3P50ZxcXEoKyuLOwwRkRbFzGaFEBpcVVgjckVEEiTvrvTNrBJ4L+44mqkL8HHcQeQRvR6b0uuxkV6LTTXn9dglhNBgT5i8S/qFwMzKonzMSgq9HpvS67GRXotN5eL1UHlHRCRBlPRFRBJEST87JsUdQJ7R67EpvR4b6bXYVNZfD9X0RUQSRFf6IiIJoqTfTGa2s5k9b2bzzazczM5Pb+9sZs+mF495Nj0NRSKYWWszm2Nmj6dv9zaz19OvxZ/S03kkgpltb2YPmdm/0+fIgQk/Ny5M/5+8ZWaTzax9ks4PM7vbzJab2Vs1ttV5Ppj7fXrxqrlmtl8mYlDSb74q4MchhD2BQcCPzKwfMA54LoSwG/Bc+nZSnI9P2VHtN8BN6dfiU+DsWKKKx83AUyGEPYBv4a9LIs8NM+sO/A9QHELoD7TG5/JK0vlxD5uvKbKl8+FoYLf01xjg9oxEEELQVwa/gL/iq4wtALqlt3UDFsQdW45+/x7pE/fbwOP49NofA23S9x8IPB13nDl6LToC75JuO6uxPannRvX6Gp3xVfsexxdaStT5AfQC3mrofAAmAqfUtV9zvnSln0Fm1gvYF3gd2CGE8AFA+nvX+CLLqd8BP2XjKmpfB1YEn7gPkrWQzq5AJfCHdLnrTjPbhoSeGyGE94Eb8OVVPwA+A2aR3POj2pbOh6wsQqWknyFmti3wMHBBCGFl3PHEwcy+AywPIcyqubmOXZPSZawNsB9wewhhX2AVCSnl1CVdqx4O9AZ2ArbBSxi1JeX8aEhW/neU9DPAzLbCE/4DIYRH0ps/MrNu6fu7Acvjii+HBgPDzGwxvpbyt/Er/+3NrE16n8YswtPSVQAVIYTX07cfwt8EknhuABwOvBtCqAwhrAMeAQ4iuedHtS2dD81ZwGqLlPSbycwMuAuYH0L4bY27pgGj0j+Pwmv9BS2E8LMQQo8QQi+8ge7vIYTTgOeBEendEvFaAIQQPgSWmtnu6U2HAfNI4LmRtgQYZGYd0v831a9HIs+PGrZ0PkwDStO9eAYBn1WXgZpDg7OaycwOBl4C3mRjHftSvK4/FeiJn+wnhgQtIGNmKeDiEMJ3zGxX/Mq/MzAHOD2EsCbO+HLFzPYB7gTaAouAM/GLrUSeG2b2C+BkvNfbHOAcvE6diPPDzCYDKXw2zY+AK4G/UMf5kH5jvAXv7bMaODOE0OzFRpT0RUQSROUdEZEEUdIXEUkQJX0RkQRR0hcRSRAlfRGRBFHSFxFJECV9EZEEUdIXEUmQ/w+BCSWMD9s1nwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "plt.plot(Ks, np.array(CH_scores), 'b-')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
